package E3DS::BounceParser;

use strict;
use warnings;

use Mail::DeliveryStatus::BounceParser;
use Readonly;
use Data::Dumper;

Readonly my $DIRECT_MAIL_BOUNCE_HEADER => 'X-Direct-Mail-Bounce';
Readonly my $DIRECT_MAIL_AR_HEADER => 'X-Direct-Mail-AR';

sub parse
{
	my ( $class, $msg_string, $verbose ) = @_;
	    
	my $bounce = eval { 
		Mail::DeliveryStatus::BounceParser->new( $msg_string, { log => sub { print @_, "\n" if $verbose; } } );
	};
	
	if ( $verbose && ( $@ || !$bounce->is_bounce ) ) {
		print "Error instantiating BounceParser: $@";
	}

	if ( $bounce && $bounce->is_bounce ) {
		my @reports = $bounce->reports;
		my $report  = $reports[0];
        
		if ( !$report ) {
			print "No report\n" if $verbose;
			return;
		}
		
		if ( $report->get( 'action' ) !~ /failed/ ) {
			print "Report action is not 'failed': ", $report->get( 'action' ), "\n" if $verbose;
			return;
		}
		
		my $bounced_email = $report->get( 'email' );
		my $std_reason    = $report->get( 'std_reason' );
		my $reason		  = $report->get( 'reason' );
		
		print "PRE Bounced Email: $bounced_email, Standard Reason: $std_reason, Reason: $reason\n" if $verbose;
		
		# Collapse spaces
        $reason =~ s/\s+/ /g;
		
		# 550 redacted@earthlink.net...Due to extended inactivity new mail is not currently being accepted for this mailbox.
		if ( $reason =~ m/extended inactivity/i ) {
			$std_reason = "user_unknown";
		}
		
		# 550 No such person at this address
		if ( $reason =~ m/No such person/i ) {
			$std_reason = "user_unknown";
		}
		
		# 5.3.0 - Other mail system problem 554-'5.4.6 Hop count exceeded - possible mail loop' (delivery attempts: 0)
		if ( $reason =~ m/too many hops/i || $reason =~ m/loops back/i || $reason =~ m/hop count/i || $reason =~ m/mail loop/i ) {
			$std_reason = "mail_loop";
		}
		
		# 550 5.1.0 <216.218.226.98> This IP has sent too many messages this hour. IB504 <http://x.co/rlbounce>
		# was being flagged as "user_unknown", which we don't want because that would be a hard bounce
		if ( $reason =~ m/x\.co\/rlbounce/i or $reason =~ m/too many (messages|recipients)/i ) {
			$std_reason = "unknown";
		}
		
		# Unkown (sic) user redacted in domain dmg.at
		if ( $reason =~ m/Unkown user/i ) {
			$std_reason = "user_unknown";
		}
		
		# 550 INVALID RECIP imposed mailbox access for redacted@stthomas.edu refused
		if ( $reason =~ m/INVALID RECIP/i ) {
			$std_reason = "user_unknown";
		}
		
		# 550 Rule imposed mailbox access for redacted@nieuwsblad.be refused: user invalid
		if ( $reason =~ m/user invalid/i ) {
			$std_reason = "user_unknown";
		}
		
		# 550 4.4.7 QUEUE.Expired; message expired in unreachable destination queue. Reason: The mailbox recipient does not have a mailbox database
		if ( $reason =~ m/mailbox recipient does not have a mailbox database/i ) {
			$std_reason = "user_unknown";
		}
		# BounceParser is too eager to classify anything with "try again later" as "over_quota"
		if ( $std_reason eq 'over_quota' ) {
			$std_reason = $class->reclassifyOverQuota( $reason );
		}
		
		if ( $reason =~ m/inexistente/i || $reason =~ m/existe tal pessoa/i || $reason =~ m/Usuario Desconocido/i || $reason =~ m/No existe ese usuario/i || $reason =~ m/Nenhuma pessoa neste/i || $reason =~ m/existe essa pessoa neste/i ) {
			$std_reason = "user_unknown";
		}
					
		if ( $reason =~ m/Utilisateur inconnu/i ) {
			$std_reason = "user_unknown";
		}
					
		if ( $reason =~ m/User unknown/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/Address not present/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/Recipient verify failed/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/User [^\s]+ not found/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/This person no longer works/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/Address refused/i ) {
			$std_reason = "user_unknown";
		}
		
		if ( $reason =~ m/name service error/i ) {
			$std_reason = "domain_unknown";
		}
		
		if ( $reason =~ m/nonexistent domain/i ) {
			$std_reason = "domain_unknown";
		}

		if ( $reason =~ m/Domain name [^\s]+ does not resolve/i ) {
			$std_reason = "domain_unknown";
		}
		
		if ( $reason =~ m/domain is not hosted here/i ) {
			$std_reason = "domain_error";
		}
		
		if ( $reason =~ m/error writing message: File too large/i ) {
			$std_reason = 'over_quota';
		}
		
		if ( $reason =~ m/Unable to open mailbox/i ) {
			$std_reason = 'over_quota';
		}

		if ( $reason =~ m/Too Many Messages in Folder/i ) {
			$std_reason = 'over_quota';
		}

		if ( $reason =~ m/Mailbox has exceeded the limit/i ) {
			$std_reason = 'over_quota';
		}
		
		if ( $reason =~ m/Mailbox full/i ) {
			$std_reason = 'over_quota';
		}

		if ( $reason =~ m/disk space/i ) {
			$std_reason = 'over_quota';
		}
		
		if ( $reason =~ m/No mailbox here by that name/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/validrcptto/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/no mailbox/i ) {
			$std_reason = "user_unknown";
		}

		# 550 RC:LD The email account that you tried to reach does no exists
		if ( $reason =~ m/email account that you tried to reach does not? exists?/i ) {
			$std_reason = "user_unknown";
		}

		if ( $reason =~ m/Mailbox is frozen/i ) {
			$std_reason = "user_disabled";
		}

		if ( $reason =~ m/unavailable mail user/i ) {
			$std_reason = "user_disabled";
		}
		
		# Sometimes Mail::DeliveryStatus::BounceParser gets it wrong, so we try and catch some cases
		if ( $reason =~ m/\b(blacklists?|spam|content|standards|ClamAV)\b/i ) {
			$std_reason = "spam";
		}
		
		# See http://support.google.com/postini/bin/answer.py?hl=en&answer=2644553
		if ( $reason eq '550 mailbox unavailable - psmtp' ) {
			$std_reason = "spam";
		}
		
		# See 521 5.2.1 : (HVU:B2) http://postmaster.info.aol.com/errors/554hvub2.html
		if ( $reason =~ m/5\.2\.1/ && $reason =~ m/\.aol.\com/i ) {
			$std_reason = "unknown";
		}
		
		print "POST Bounced Email: $bounced_email, Standard Reason: $std_reason, Reason: $reason\n" if $verbose;
								
		my $dm_bounce_header = $class->extractHeader( $DIRECT_MAIL_BOUNCE_HEADER, $bounce, $msg_string );
		my $delivery_id;

		print "$DIRECT_MAIL_BOUNCE_HEADER is $dm_bounce_header\n" if $verbose;

		if ( $dm_bounce_header ) {
			if ( $dm_bounce_header =~ m/([^;\s]+); ([^;\s]+); R$/ ) {
				$delivery_id   = $1;
				$bounced_email = $2;
				$bounced_email =~ tr/n-za-mN-ZA-M/a-zA-Z/;
			}
			elsif ( $dm_bounce_header =~ m/([^;\s]+); R$/ ) {
				$bounced_email = $1;
				$bounced_email =~ tr/n-za-mN-ZA-M/a-zA-Z/;
			}
			else {
				warn "Couldn't parse bounce header: $dm_bounce_header";
			}
		}
		
		my $dm_ar_header = $class->extractHeader( $DIRECT_MAIL_AR_HEADER, $bounce, $msg_string );
		my $address_history_id;
		
		print "$DIRECT_MAIL_AR_HEADER is $dm_ar_header\n" if $dm_ar_header && $verbose;
		
		if ( $dm_ar_header && $dm_ar_header =~ m/^v1; ([a-z0-9_-]+)/i ) {
			$address_history_id = $1;
		}
		
		# Try and avoid marking the original recipient as bounced when it was actually a Cc or Bcc recipient
		# that bounced
		my $report_email = $report->get('email');
		
		if ( $report_email && $bounced_email && lc($report_email) ne lc($bounced_email) ) {
			warn "The bounced recipient ($report_email) was not the same as the original recipient ($bounced_email). Skipping.";
			return;
		}
		
		my $results_ref = {
			Reason => $reason,
			StandardReason => $std_reason,
			FullText => $msg_string,
			Relay => $report->get('host'),
		};
		
		$results_ref->{ Email } = $bounced_email if $bounced_email;
		$results_ref->{ DeliveryID } = $delivery_id if $delivery_id;
		$results_ref->{ AddressHistoryID } = $address_history_id if $address_history_id;

		print Dumper( $results_ref ) if $verbose;

		return $results_ref;
	}	
	
	return;
}

sub reclassifyOverQuota
{
	my    $class = shift;
	local $_     = shift;
	
    return "over_quota" if (/mailbox\b.*\bfull/ or
		/storage/i          or
		/quota/i            or
		/\s552\s/           or
		/\s#?5\.2\.2\s/     or                                # rfc 1893
		/User\s+mailbox\s+exceeds\s+allowed\s+size/i or
		/Mailbox\s+size\s+limit\s+exceeded/i or
		/max\s+message\s+size\s+exceeded/i or
		/Benutzer\s+hat\s+zuviele\s+Mails\s+auf\s+dem\s+Server/i or
		/exceeded\s+its\s+disk\s+space\s+limit/i
    );
	
	return "unknown";
}

sub extractHeader
{
	my ( $class, $field, $bounce, $msg_string ) = @_;
	my $header;

	if ( $bounce->orig_message && $bounce->orig_message->head ) {
		$header = $bounce->orig_message->head->get( $field );
	}

	if ( !$header && $bounce->orig_header ) {
		$header = $bounce->orig_header->get( $field );
	}

	if ( !$header && $bounce->orig_text && $bounce->orig_text =~ m/$field: (.+)/ ) {
		$header = $1;
	}

	if ( !$header && $msg_string && $msg_string =~ m/$field: (.+)\b/ ) {
		$header = $1;
	}
	
	return $header;
}

1;